
***********************************************************************************************;
*   Code Name: Part1InitializeLinda.sas
  	Authors: Per Siden and Thomas Jansson (Sveriges Riksbank)
	Created: January 30, 2013
	Last edited: March 30, 2017;
***********************************************************************************************;

* This is the first step in a series of codes used in this project.
  The original thought is to run the codes in the following order:
	1. Part1InitializeLinda.sas
	2. Part2IncludeAssets.sas
	3. Part3CreateHHVariables.sas
	4. Part4RegionalImmigrantRatio.sas

  In this first file, we will extract the useful variables from the main LINDA data (not wealth/asset-data)
  for the years 1987-1998 and 1999-2007 and throw away the variables we do not need. We will also create some new
  variables which are based on the ones in the data-set and rename them to understandable names.

  STEP 1: Read from original file and construct new variables.
  STEP 2: Correct and construct country-variables in english.
  STEP 3: Correct and construct education variables.
  STEP 4: Add variable for spouses' countries.

***********************************************************************************************;
* Define what variables to extract, some of them year-specific;
%let varsallyears = nybidnr nybidnrf bald binvar bant 
					bkon cdispl cdisplh bfoab blkfnov
 					bemigdat bdoddat akupens pkupens istudsfl 
					tstudl bkuinst bkungr tarbstl tkulon
					bobjtyp tkulonf byrku bciv bfamst 
					bantf banti bantui burvkodf burvkodi
					tforpl cprim tfoab ppensspl ppenssfl;

%let vars1987 = nybidnr alder bant civ bbidr;
%let vars1999 = bfoland bnation bsun;
%let vars2000 = bfoland bnation bsunniv bsuninr;
%let vars2001 = bfoland bnationt bsunniv bsuninr;
%let varspost2001 = bfland bnat bfodln bnation bsunniv bsuninr;

* Government student aid values (subsidies for one semester) from CSN;
%let maxstud1999 = 8878;%let maxstud2000 = 8927;%let maxstud2001 = 9000;
%let maxstud2002 = 11440;%let maxstud2003 = 11640;%let maxstud2004 = 11860;
%let maxstud2005 = 11880;%let maxstud2006 = 11980;%let maxstud2007 = 12640;
***********************************************************************************************;

DM 'CLEAR LOG';
DM 'CLEAR OUTPUT';
Proc datasets LIB = work KILL;

***********************************************************************************************;
* STEP 1
* This big macro extracts and creates all interesting variables that comes from the panel data;
***********************************************************************************************;

*get information about age;
%macro m0bbbaaa(f_or_i);
%do I = 1985 %to 1998;

	%if &I<1993 %then %do;
		data age&f_or_i&I;			
   			set original.linda&f_or_i&I (keep = nybidnr alder);
   			age = alder*1;
			drop alder;
			yearborn = &I-age;
		run;
	%end;

	%else %do;
		data age&f_or_i&I;			
  		 	set original.linda&f_or_i&I (keep = nybidnr bald);
   			rename bald = age;
			yearborn = &I-bald;
 		run;
%end;

proc sort data = age&f_or_i&I;
	by nybidnr;
run;

*link bidnr and bidnrh (and sample status);
	%if &f_or_i=i %then %do;
		%if &I<1999 %then %do;
			data lindaFAMILY&f_or_i&I;
				set original.linda&f_or_i&I (keep = nybidnr nybidnrh BURVKODU);
				rename BURVKODU = BURVKODI;
			run;
		%end;

		%if &I>1998 %then %do;
			data lindaFAMILY&f_or_i&I;
				set original.linda&f_or_i&I (keep = nybidnr nybidnrf BURVKODI);
				rename nybidnrf = nybidnrh;
			run;
		%end;
	%end;

	%if &f_or_i=f %then %do;
		%if &I<1991 %then %do;
			data lindaFAMILY&f_or_i&I;
				set original.linda&f_or_i&I (keep = nybidnr nybidnrh BURVKODP);
				rename BURVKODP = BURVKODF;
			run;
		%end;

		%if &I>1990 %then %do;
			data lindaFAMILY&f_or_i&I;
				set original.linda&f_or_i&I (keep = nybidnr nybidnrf BURVKODF);
				rename nybidnrf = nybidnrh;
			run;
		%end;
	%end;

proc sort data = lindaFAMILY&f_or_i&I;
	by nybidnr;
run;

%end;
%mend;
%m0bbbaaa(f);
%m0bbbaaa(i);


*get information about the youngest individual in the hh;
%macro m0bbbab(f_or_i);
%do I = 1985 %to 1998;

data lindaage&f_or_i&I;
  	merge lindaFAMILY&f_or_i&I age&f_or_i&I;
		by nybidnr;
run;

data lindaage&f_or_i&I;
	set lindaage&f_or_i&I;
		where age>0;
run;

proc sort data = lindaage&f_or_i&I;
	by nybidnrh descending yearborn;
run;

proc sort nodupkey data = lindaage&f_or_i&I out = youngest&f_or_i&I;
	by nybidnrh;
run;

data youngest&f_or_i&I;
	set youngest&f_or_i&I;
		rename yearborn = youngest_hh&I;
	keep nybidnrh yearborn;
run;

proc sort data = lindaFAMILY&f_or_i&I;
	by nybidnrh;
run;

data merged&f_or_i&I;
	merge lindaFAMILY&f_or_i&I youngest&f_or_i&I;
		by nybidnrh;
run;

data temp.youngesthh&f_or_i&I;
	set merged&f_or_i&I;
		rename nybidnr = id;
	keep nybidnr youngest_hh&I;
run; 

proc sort data = temp.youngesthh&f_or_i&I;
	by id;
run; 

%end;
%mend;
%m0bbbab(f);
%m0bbbab(i);

**********************************************************************************************
*Create individual variables for period 1987 to 1998;
%macro m1aa(f_or_i);
%do y = 1987 %to 1998;

%if &y<1993 %then %do;

	%if &f_or_i = f %then %do;
		%if &y>1990 %then %do;
			%let familyid = nybidnrf; %end;
		%else %do; 	%let familyid = nybidnrh; %end; 
	%end;
	%if &f_or_i = i %then %do;
		%let familyid = nybidnrh; 
	%end;

%if &y<1988 %then %do;
	%let vars_to_keep = &familyid &vars1987; %end;
%else %if &y = 1988 or &y = 1989 %then %do; 
	%let vars_to_keep = &familyid &vars1987 barny barnae; %end;
%else %if &y = 1990 %then %do; 
	%let vars_to_keep = &familyid &vars1987 utbildn utbar barny barnae; %end;
%else %if &y = 1991 or &y = 1992 %then %do; 
	%let vars_to_keep = &familyid &vars1987 utbildn utbar barny barnae; %end;


data linda&f_or_i&y;
	set original.linda&f_or_i&y(keep = &vars_to_keep);

	year = &y;											* Year;
	label year = 'Year';
	age = alder*1;										* Age;
	label age = 'Age';
	hh_size&y = bant;									* Household size;
	label hh_size&y = 'Household size';
	%if &y>1987 %then %do; 
	kids16 = barny*1;
	label kids16 = 'Nr kids under 16';
	kids1718 = barnae*1;
	label kids1718 = 'Nr kids 17 to 18';
	drop  barny barnae;
	%end;
	%if &y=1987 %then %do; 
	kids16 =.;
	label kids16 = 'Nr kids under 16';
	kids1718 =.;
	label kids1718 = 'Nr kids 17 to 18';
	%end;
	rename nybidnr = id &familyid = idhh;
	drop alder bant;

	if CIV = 02 or CIV = 03 or CIV = 07 then married&y = 1;
	else married&y = 0;									* Married (dummy);
	label married&y = 'Married (dummy)';	

	%if &y = 1990 %then %do; 
	edulevel = utbildn; %end;
	%if &y > 1990 %then %do; 
	edulevel = substr(utbildn,2,1); %end;
	%if &y>1989 %then %do;
	if 5<=edulevel=<7 then edu&y = 3; 
	else if 3<=edulevel=<4 then edu&y = 2; 
	else if 1<=edulevel=<2 then edu&y = 1; 
	else if edulevel=1 then edu&y = 0;	
	else if 8<=edulevel=<9 then edu&y = 0;	
	edu_year&y = utbar*1;
	if 0<edu_year&y<100 then edu_year&y = 1900 + edu_year&y;
	drop utbar;
	rename edulevel = edulevel&y;
	%end;
run;

proc sort data =  linda&f_or_i&y;
	by idhh id;
run;

data lindahh&f_or_i&y;
	set linda&f_or_i&y;
	by idhh;
	if first.idhh then do;
		hh_children&y = 0;
		hh_kids16&y = 0;
		hh_kids1718&y = 0;
		hh_kidallow&y = 0;	
	end;

	hh_kidallow&y + bbidr;
	
	if age < 18 then do;
		hh_children&y + 1;	
	end;
	if kids16>0 then do;
		hh_kids16&y + 1;	
	end;
	if kids1718>0 then do;
		hh_kids1718&y + 1;	
	end;

	if last.idhh then do;
		output;
	end;
	keep idhh hh_children&y hh_kids16&y hh_kids1718&y hh_kidallow&y;
run;

data temp.linda&f_or_i&y;
	merge linda&f_or_i&y lindahh&f_or_i&y;
		by idhh;
	keep id %if &y > 1989 %then %do; edu&y edu_year&y edulevel&y %end; hh_size&y hh_children&y hh_kids16&y hh_kids1718&y hh_kidallow&y married&y;
run;

%end;

%if &y>1992 %then %do;

	%if &f_or_i=f %then %do;
	data linda&f_or_i&y;
	set original.linda&f_or_i&y(keep = nybidnr nybidnrf bsun bsunar bbrn15 bbrn167);
	idhh = nybidnrf;
	%end;
	%if &f_or_i=i %then %do;
	data linda&f_or_i&y;
	set original.linda&f_or_i&y(keep = nybidnr nybidnrh bsun bsunar bbrn15 bbrn167);
	idhh = nybidnrh;
	%end;
	kids16 = bbrn15*1;
	label kids16 = 'Nr kids under 16';
	kids1718 = bbrn167*1;
	label kids1718 = 'Nr kids 17 to 18';
	drop bbrn15 bbrn167;

	edulevel = substr(bsun,2,1);
	if 5<=edulevel=<7 then edu&y = 3; 
	else if 3<=edulevel=<4 then edu&y = 2; 
	else if 1<=edulevel=<2 then edu&y = 1; 
	else if edulevel=1 then edu&y = 0;	
	else if 8<=edulevel=<9 then edu&y = 0;
	edu_year&y = bsunar*1;
	if 0<edu_year&y<100 then edu_year&y = 1900 + edu_year&y;

	rename nybidnr = id edulevel = edulevel&y;
	keep nybidnr idhh edu&y edu_year&y edulevel kids16 kids1718;
	run;

proc sort data =  linda&f_or_i&y;
	by idhh id;
run;

data lindahh&f_or_i&y;
	set linda&f_or_i&y;
	by idhh;
	if first.idhh then do;
		hh_kids16&y = 0;
		hh_kids1718&y = 0;
	
	end;

	if kids16>0 then do;
		hh_kids16&y + 1;	
	end;
	if kids1718>0 then do;
		hh_kids1718&y + 1;	
	end;

	if last.idhh then do;
		output;
	end;
	keep idhh hh_kids16&y hh_kids1718&y;
run;

data temp.linda&f_or_i&y;
	merge linda&f_or_i&y lindahh&f_or_i&y;
		by idhh;
	drop idhh kids16 kids1718;	
run;


%end;

%if &y>1993 %then %do;

data temp.hhincome&f_or_i&y;
	set original.linda&f_or_i&y(keep = nybidnr cdisplh);
	rename nybidnr = id cdisplh = inchh_disp&y;
run;

proc sort data = temp.hhincome&f_or_i&y;
	by id;
run;

%end;

proc sort data = temp.linda&f_or_i&y;
	by id;
run;


%end;

		data temp.lindaf1996;
		length bsunnew $5;
			set original.lindaf1996(keep = nybidnr bsun bsunar bbrn15 bbrn167);
			bsunUP = bsun+100000;
			bsunadj = put(bsunUP,6.);
			bsunnew = substr(bsunadj,2,5);

			kids16 = bbrn15*1;
			label kids16 = 'Nr kids under 16';
			kids1718 = bbrn167*1;
			label kids1718 = 'Nr kids 17 to 18';
			drop bbrn15 bbrn167;

			edulevel = substr(bsunnew,2,1);
		if 5<=edulevel=<7 then edu1996 = 3; 
		else if 3<=edulevel=<4 then edu1996 = 2; 
		else if 1<=edulevel=<2 then edu1996 = 1; 
		else if edulevel=1 then edu1996 = 0;	
		else if 8<=edulevel=<9 then edu1996 = 0;
		edu_year1996 = bsunar*1;
		if 0<edu_year1996<100 then edu_year1996 = 1900 + edu_year1996;

		rename nybidnr = id edulevel = edulevel1996;
		keep nybidnr edu1996 edu_year1996 edulevel;
		run;

proc sort data = temp.lindaf1996;
	by id;
run;

*merge and save;
data temp.arrivalyears&f_or_i;
	merge temp.linda&f_or_i.1987-temp.linda&f_or_i.1998 temp.hhincome&f_or_i.1994-temp.hhincome&f_or_i.1998 temp.youngesthh&f_or_i.1987-temp.youngesthh&f_or_i.1998;
		by id;
run;

%mend;
%m1aa(f);
%m1aa(i);

**********************************************************************************************
* Create individual variables for period 1999 to 2007;

%macro m1a(f_or_i);
%do y = 1999 %to 2007;

* Create year-specific macro variables;
%if &y = 1999 %then %do;
	%let vars_to_keep = &varsallyears &vars1999; %end;
%else %if &y = 2000 %then %do; 
	%let vars_to_keep = &varsallyears &vars2000; %end;
%else %if &y = 2001 %then %do; 
	%let vars_to_keep = &varsallyears &vars2001; %end;
%else %do; %let vars_to_keep = &varsallyears &varspost2001; %end;

data temp.linda&f_or_i&y(drop = &vars_to_keep);
	length id idhh year 8 birthcountry citizencountry $ 40 birthcountry_code citizencountry_code $ 2;
	set original.linda&f_or_i&y(keep = &vars_to_keep);

	if bobjtyp ^= 3;									* Drop Death Estates ;

	id = nybidnr; 										* Individual id;
	label id = 'Individual id';
	idhh = nybidnrf ; 									* Household id;
	label idhh = 'Household id';
	year = &y;											* Year;
	label year = 'Year';
	age = bald;											* Age;
	label age = 'Age';
	immigr_year = input(binvar,4.);						* Immigration year;
	label immigr_year = 'Immigration year';
	hh_size = bant;										* Household size;
	label hh_size = 'Household size';
	sex = bkon;											* Gender (1=man, 2=woman);
	label sex = 'Gender (1=man, 2=woman)';
	emigrationdate = bemigdat;							* Emigration date this year;
	label emigrationdate = 'Emigration date this year';
	deathdate = bdoddat;								* Death date this year;
	label deathdate = 'Death date this year';
	if BDODDAT > 0 or BCIV = 06 then deceased = 1;		* Deceased (dummy);
	else deceased = 0;
	label deceased = 'Deceased (dummy)';
	pension = sum(of ppensspl ppenssfl);				* Pension;
	label pension = 'Pension';
	pensioninsu = akupens;								* Pension insurance;
	label pensioninsu = 'Pension insurance';
	studentaid  = sum(of tstudl istudsfl);				* Student Aid;
	label studentaid = 'Student Aid';
	unemploybenefits = tarbstl;							* Unemployment benefits;
	label unemploybenefits = 'Unemployment benefits';
	laborinc = cprim + tfoab;							* Labor income; *CPRIM = prim�rinkomst TFOAB = inkomst fr�n f�mansf�retag som redovisas som inkomst;
	label laborinc = 'Labor income';
	parentalinc = tforpl;								* Parental leave income;
	label laborinc = 'Parental leave income';
	if BCIV = 02 or BCIV = 03 or BCIV = 07 or BCIV = 12 or BCIV = 17 then married = 1;
	else married = 0;									* Married (dummy);
	label married = 'Married (dummy)';	
	familyrelation = substr(BFAMST,1,1);				* Role in the family (1=Partner,2=Single parent,3=Child,4=Single,0=Others);
	label familyrelation = 'Role in the family (1=Partner,2=Single parent,3=Child,4=Single,0=Others)';	
	inc = cdispl;										* Disposable income;
	label inc = 'Disposable income';
	inchh = cdisplh;									* Disposable income HH (by LINDA);
	label inchh = 'Disposable income HH (by LINDA)';	
	region = BLKFNOV;									* Region code;	
	label region = 'Region code';

	* This step is to decide occupation: Unemployed, Retired, Student or Employed;
	if (unemploybenefits > 0) then do;
		unemployed = 1;
		retired = 0;
		student = 0;
		employed = 0;
	end;
	else if (pension > (laborinc + parentalinc)) then do;
		unemployed = 0;
		retired = 1;
		student = 0;
		employed = 0;
	end;
	else if (studentaid > (.95*&&maxstud&y)) then do;
		unemployed = 0;
		retired = 0;
		student = 1;
		employed = 0;
	end;
	else if ((laborinc + parentalinc) > 0) then do;
		unemployed = 0;
		retired = 0;
		student = 0;
		employed = 1;
	end;
	else do;
		unemployed = 1;
		retired = 0;
		student = 0;
		employed = 0;
	end;
	label unemployed = 'Unemployed (dummy)';
	label retired = 'Retired (dummy)';
	label student = 'Student (dummy)';
	label employed = 'Employed (dummy)';
	sni_code = input(BKUNGR,best5.); 					* Industrial code (SNI);
	label sni_code = 'Industrial code (SNI)';
	if substr(BKUINST,1,1) = '2' and employed then finsector = 1; * Work in financial sector (dummy);
	else finsector = 0;	
	label finsector = 'Work in financial sector (dummy)';
	
	* Create dummies for working in government sector;
	bkuinst4 = substr(BKUINST,4,1);
	%if &y <= 2001 %then %do;
		if bkuinst4 in ('1','2') and employed then govsector = 1;	
		else govsector = 0;		
	%end;
	%else %do;
		if bkuinst4 in ('1','2','3') and employed then govsector = 1;	
		else govsector = 0;	
	%end;
	label govsector = 'Work in government sector (dummy)';	

	* These if-clauses takes care of Country of Birth and Citizen variables,
	  that are somewhat different for different years ;
	%if &y < 2001 %then %do;
		birthcountry = '';								* Country of birth (Swedish);
		label birthcountry = 'Country of birth';
		citizencountry = '';							* Country of citizenship (Swedish);
		label citizencountry = 'Country of citizenship';
		birthcountry_code = bfoland;					* Country of birth (Code);
		if birthcountry_code = 'CS' then birthcountry_code = 'CE'; * Czechoslovakia;
		label birthcountry_code = 'Country of birth (Code)';
		citizencountry_code = bnation;					* Country of citizenship (Code);
		if citizencountry_code = 'CS' then citizencountry_code = 'CE'; * Czechoslovakia;
		label citizencountry_code = 'Country of citizenship (Code)';
	%end;
	%else %if &y = 2001 %then %do;
		birthcountry = '';								* Country of birth (Swedish);
		label birthcountry = 'Country of birth';
		citizencountry = '';							* Country of citizenship (Swedish);
		label citizencountry = 'Country of citizenship';
		birthcountry_code = bfoland;					* Country of birth (Code);
		if birthcountry_code = 'CS' then birthcountry_code = 'CE'; * Czechoslovakia;
		label birthcountry_code = 'Country of birth (Code)';
		citizencountry_code = bnationt;					* Country of citizenship (Code);
		if citizencountry_code = 'CS' then citizencountry_code = 'CE'; * Czechoslovakia;
		label citizencountry_code = 'Country of citizenship (Code)';
	%end;
	%else %do;
		birthcountry = bfland;							* Country of birth (Swedish);
		label birthcountry = 'Country of birth';
		citizencountry = bnat;							* Country of citizenship (Swedish);
		label citizencountry = 'Country of citizenship';
		birthcountry_code = bfodln;						* Country of birth (Code);
		label birthcountry_code = 'Country of birth (Code)';
		citizencountry_code = bnation;					* Country of citizenship (Code);
		label citizencountry_code = 'Country of citizenship (Code)';
	%end;

	* Education codes, different for 1999 and after;
	%if &y = 1999 %then %do;
		edu_old_code = input(bsun,best5.);				* Education old code;
		label edu_old_code = 'Education old code';
		eduorient_code = '    ';						* Education orientation code;
		label eduorient_code = 'Education orientation code';
		edulevel_code = '   ';							* Education level code;
		label edulevel_code = 'Education level code';
	%end;
	%else %do;
		edu_old_code = 0;								* Education old code;
		label edu_old_code = 'Education old code';
		eduorient_code = bsuninr;						* Education orientation code;
		label eduorient_code = 'Education orientation code';
		edulevel_code = bsunniv;						* Education level code;
		label edulevel_code = 'Education level code';
	%end;

	nSampled_inHHf = bantf;								* Number of sampled in HH (regular LINDA);
	label nSampled_inHHf = 'Number of sampled in HH (regular LINDA)';
	nImmigr_inHH = bantui;								* Number of immigrants in HH;
	label nImmigr_inHH = 'Number of immigrants in HH';
	nSampled_inHHi = banti;								* Number of sampled in HH (immigrant LINDA);
	label nSampled_inHHi = 'Number of sampled in HH (immigrant LINDA)';
	sampled_f = burvkodf;								* Sampled (regular LINDA);
	label sampled_f = 'Sampled (regular LINDA)';
	sampled_i = burvkodi;								* Sampled (immigrant LINDA);
	label sampled_i = 'Sampled (immigrant LINDA)';	
	drop bkuinst4;
run;

%end;
%mend;
%m1a(f);
%m1a(i);

***********************************************************************************************
* STEP 2
* Construct a single birthcountry variable in English for all years.
  The same for citizen country.
***********************************************************************************************;

%macro m1b(f_or_i);

proc sort data = keys.country_key;
	by country_code_swe;
run;

%do y = 1999 %to 2001;
proc sort data = temp.linda&f_or_i&y;
	by birthcountry_code;
run;

data temp.linda&f_or_i&y;
	merge temp.linda&f_or_i&y(in = left) 
		  keys.country_key(keep = country_code_swe country_code_eng country_eng 
						   rename=(country_code_swe = birthcountry_code
								   country_eng = birthcountry_eng
								   country_code_eng = birthcountry_code_eng));
	by birthcountry_code;
	if left;
run;

proc sort data = temp.linda&f_or_i&y;
	by citizencountry_code;
run;
data temp.linda&f_or_i&y;
	merge temp.linda&f_or_i&y(in = left) 
		  keys.country_key(keep = country_code_swe country_code_eng country_eng 
						   rename=(country_code_swe = citizencountry_code
								   country_eng = citizencountry_eng
								   country_code_eng = citizencountry_code_eng));
	by citizencountry_code;
	if left;
run;

%end;

proc sort data = keys.country_key;
	by country_swe;
run;

%do y = 2002 %to 2007;

proc sort data = temp.linda&f_or_i&y;
	by birthcountry;
run;

data temp.linda&f_or_i&y;
	merge temp.linda&f_or_i&y(in = left) 
		  keys.country_key(keep = country_swe country_code_eng country_eng 
						   rename=(country_swe = birthcountry
								   country_eng = birthcountry_eng
								   country_code_eng = birthcountry_code_eng));
	by birthcountry;
	if left;
run;

proc sort data = temp.linda&f_or_i&y;
	by citizencountry;
run;
data temp.linda&f_or_i&y;
	merge temp.linda&f_or_i&y(in = left) 
		  keys.country_key(keep = country_swe country_code_eng country_eng 
						   rename=(country_swe = citizencountry
								   country_eng =citizencountry_eng
								   country_code_eng = citizencountry_code_eng));
	by citizencountry;
	if left;
run;
%end;

%mend;
%m1b(i);
%m1b(f);

* Drop old country variables;
%macro m1c(f_or_i);
%do y = 1999 %to 2007;
data temp.linda&f_or_i&y;
	length id idhh year 8 birthcountry citizencountry $ 40 birthcountry_code citizencountry_code $ 2;
	set temp.linda&f_or_i&y(drop = birthcountry birthcountry_code citizencountry citizencountry_code
							rename=(birthcountry_eng = birthcountry
									birthcountry_code_eng = birthcountry_code
									citizencountry_eng = citizencountry
									citizencountry_code_eng = citizencountry_code));
	label birthcountry = 'Country of birth';
	label citizencountry = 'Country of citizenship';
	label birthcountry_code = 'Country of birth (Code)';
	label citizencountry_code = 'Country of citizenship (Code)';
run;
%end;
%mend;
%m1c(f);
%m1c(i);


***********************************************************************************************
* STEP 3
* Construct two education variables: edulev and eduorient. 1999 is handled separately.
***********************************************************************************************;

%macro m1d(f_or_i);

%let y = 1999;
proc sort data = temp.linda&f_or_i&y;
	by edu_old_code;
run;

data temp.linda&f_or_i&y;
	merge temp.linda&f_or_i&y(in = left) keys.edu99_key(rename = (bsun = edu_old_code));
	by edu_old_code;
	if left;
	label edulev = 'Education level';
	label eduorient = 'Education orientation';
	if missing(edulev) then edulev = '9';
	if missing(eduorient) then eduorient = '9';
run;

%do y = 2000 %to 2007;

data temp.linda&f_or_i&y;
	length id idhh year 8 birthcountry citizencountry $ 40 edulev eduorient $ 2;
	set temp.linda&f_or_i&y;
	edulev = substr(edulevel_code,1,1);	
	eduorient = substr(eduorient_code,1,1);	
	label edulev = 'Education level';
	label eduorient = 'Education orientation';
	if missing(edulev) then edulev = '9';
	if missing(eduorient) then eduorient = '9';
run;
%end;

%mend;
%m1d(f);
%m1d(i);

* Drop old education variables;
%macro m1e(f_or_i);

%do y = 1999 %to 2007;

data temp.linda&f_or_i&y;
	set temp.linda&f_or_i&y;
	drop edu_old_code eduorient_code edulevel_code;
run;
%end;

%mend;
%m1e(f);
%m1e(i);


***********************************************************************************************
* STEP 4
* Create spouses' country of birth and country of citizenship variables
***********************************************************************************************;

%macro m1f(f_or_i);
%do y = 1999 %to 2007;


proc sort data = temp.linda&f_or_i&y;
	by idhh id;
run;

* Extract hhs with two partners;
data temp.twop;
	length partnercount 8;
	set temp.linda&f_or_i&y(keep = idhh familyrelation);
	by idhh;
	retain partnercount 0;
	if first.idhh then partnercount = 0;
	if familyrelation = '1' then partnercount + 1;
	if last.idhh and partnercount = 2 then output;
	keep idhh;
run;

data temp.twop;
	merge temp.twop(in = left) temp.linda&f_or_i&y(keep = id idhh birthc: citizenc: familyrelation where = (familyrelation = '1'));
	by idhh;
	if left;
run;

data temp.twop;
	length idspouse 8 sp_birthcountry sp_citizencountry $ 40 sp_birthcountry_code sp_citizencountry_code $ 2;
	set temp.twop;
	by idhh;
	retain idspouse sp_birthcountry sp_citizencountry sp_birthcountry_code sp_citizencountry_code;
	if first.idhh then do;
		idspouse = id;
		sp_birthcountry = birthcountry;
		sp_citizencountry = citizencountry;
		sp_birthcountry_code = birthcountry_code;
		sp_citizencountry_code = citizencountry_code;
	end;
	if last.idhh then do;
		output;
		idtemp = idspouse;
		idspouse = id;
		id = idtemp;
		sp_birthcountry = birthcountry;
		sp_citizencountry = citizencountry;
		sp_birthcountry_code = birthcountry_code;
		sp_citizencountry_code = citizencountry_code;
		output;
	end;
	keep id idhh idspouse sp:;
run;

proc sort data = temp.twop;
	by id;
run;
proc sort data = temp.linda&f_or_i&y;
	by id;
run;

data temp.linda&f_or_i&y;
	merge temp.linda&f_or_i&y(in = left) temp.twop;
	by id;
	if left;
	label sp_birthcountry = 'Spouse country of birth';
	label sp_birthcountry_code = 'Spouse country of birth (code)';
	label sp_citizencountry = 'Spouse country of citizenship';
	label sp_citizencountry_code = 'Spouse country of citizenship (code)';
run;
%end;
%mend;
%m1f(f);
%m1f(i);

***********************************************************************************************;
